Problem


As it can be seen in 20_04_08_Ridge.html, there is a relation between the score the model assigns to a gene and the gene’s mean level of expression. This is a problem because we had previously discovered a bias in the SFARI scores related to mean level of expression (Preprocessing/Gandal/AllRegions/RMarkdowns/20_04_03_SFARI_genes.html), which means that this could be a confounding factor in our model and the reason why it seems to perform well, so we need to remove this bias to recover the true biological signal that is mixed with it and improve the quality of our model.


Post-Processing Approach


After the model has been trained with the bias, perform a post-processing of the classifier outputs.

Since the effect of the bias is proportional to the mean level of expression of a gene, we can correct it by removing the effect of the mean expression from the probability of the model.

Problems:

library(tidyverse)
library(knitr)
library(plotly) ; library(viridis) ; library(gridExtra) ; library(RColorBrewer)
library(biomaRt)
library(mgcv)
library(Rtsne)
library(knitr)
library(ROCR)
library(expss)

SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

Load data

# Clusterings
clustering_selected = 'DynamicHybridMergedSmall'
clusterings = read_csv('./../Data/clusters.csv')
clusterings$Module = clusterings[,clustering_selected] %>% data.frame %>% unlist %>% unname

assigned_module = data.frame('ID' = clusterings$ID, 'Module' = clusterings$Module)

# Regression data
load(file='./../Data/Ridge_model.RData')

# Mean Expression data
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame

# Dataset created with DynamicTreeMerged algorithm
clustering_selected = 'DynamicHybridMergedSmall'
original_dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'), row.names=1)

# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
               host='feb2014.archive.ensembl.org')
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=rownames(dataset), mart=mart)

rm(dds, datGenes, datMeta, clustering_selected, clusterings)


Remove Bias


The relation between level of expression and probability assigned by the model could be modelled by a linear regression, but we would lose some of the behaviour. Fitting a curve using Generalised Additive Models seems to capture the relation in a much better way, with an \(R^2\) twice as large and no recognisable pattern in the residuals of the regression.

plot_data = data.frame('ID'=rownames(datExpr), 'meanExpr'=rowMeans(datExpr)) %>% 
            right_join(test_set %>% mutate(ID=rownames(test_set)), by='ID')

# Fit linear and GAM models to data
lm_fit = lm(prob ~ meanExpr, data = plot_data)
gam_fit = gam(prob ~ s(meanExpr), method = 'REML', data = plot_data)

plot_data = plot_data %>% mutate(lm_res = lm_fit$residuals, gam_res = gam_fit$residuals)

# Plot data
p1 = plot_data %>% ggplot(aes(meanExpr, prob)) + geom_point(alpha=0.1, color='#0099cc') + geom_smooth(method='lm', color='gray', alpha=0.3) +
     xlab('Mean Expression') + ylab('Probability') + ggtitle('Linear Fit') + theme_minimal()

p2 = plot_data %>% ggplot(aes(meanExpr, prob)) + geom_point(alpha=0.1, color='#0099cc') + geom_smooth(method='gam', color='gray', alpha=0.3) +
     xlab('Mean Expression') + ylab('Probability') + ggtitle('GAM fit') + theme_minimal()

p3 = plot_data %>% ggplot(aes(meanExpr, lm_res)) + geom_point(alpha=0.1, color='#ff9900') + 
     geom_smooth(method='gam', color='gray', alpha=0.3) + xlab('Mean Expression') +
     ylab('Residuals') + theme_minimal() + ggtitle(bquote(paste(R^{2},' = ', .(round(summary(lm_fit)$r.squared, 4)))))

p4 = plot_data %>% ggplot(aes(meanExpr, gam_res)) + geom_point(alpha=0.1, color='#ff9900') + 
     geom_smooth(method='gam', color='gray', alpha=0.3) + xlab('Mean Expression') +
     ylab('Residuals') + theme_minimal() + ggtitle(bquote(paste(R^{2},' = ', .(round(summary(gam_fit)$r.sq, 4)))))


grid.arrange(p1, p2, p3, p4, nrow = 2)

rm(p1, p2, p3, p4, lm_fit)


Remove bias from scores with GAM fit


  • Assigning the residuals of the GAM model as the new model probability

  • Adding the mean probability of the original model to each new probability so our new probabilities have the same mean as the original ones

  • As with the plot above, the relation between mean expression and the probability assigned by the model is gone

# Correct Bias
test_set$corrected_score = gam_fit$residuals + mean(test_set$prob)

# Plot results
plot_data = data.frame('ID'=rownames(datExpr), 'meanExpr'=rowMeans(datExpr)) %>% 
            right_join(test_set %>% mutate(ID=rownames(test_set)), by='ID')

plot_data %>% ggplot(aes(meanExpr, corrected_score)) + geom_point(alpha=0.1, color='#0099cc') + 
              geom_smooth(method='gam', color='gray', alpha=0.3) + ylab('Corrected Score') + xlab('Mean Expression') +
              theme_minimal() + ggtitle('Mean expression vs Model score corrected using GAM')




Performance Metrics


Confusion matrix

conf_mat = test_set %>% mutate(corrected_pred = corrected_score>0.5) %>%
                        apply_labels(SFARI = 'Actual Labels', 
                                     corrected_score = 'Corrected Score', 
                                     corrected_pred = 'Corrected Label Prediction')

cro(conf_mat$SFARI, list(conf_mat$pred, total()))
 FALSE   TRUE   #Total 
 Actual Labels 
   FALSE  8031 4226 12257
   TRUE  81 96 177
   #Total cases  8112 4322 12434
rm(conf_mat)

Accuracy


The accuracy was expected to decrease because the bias was helping classify samples correctly, but for the wrong reasons. Although I was expecting the accuracy to change more.

old_acc = mean(test_set$SFARI==(test_set$prob>0.5))
acc = mean(test_set$SFARI==(test_set$corrected_score>0.5))

cat(paste0('Accuracy = ', round(acc,4)))
## Accuracy = 0.6491
cat(paste0('Accuracy decreased ',round(old_acc-acc,4), ' points'))
## Accuracy decreased 0.0045 points
rm(acc, old_acc)

ROC Curve


AUC decreased 0.04

pred_ROCR = prediction(test_set$corrected_score, test_set$SFARI)

roc_ROCR = performance(pred_ROCR, measure='tpr', x.measure='fpr')
AUC = performance(pred_ROCR, measure='auc')@y.values[[1]]

plot(roc_ROCR, main=paste0('ROC curve (AUC=',round(AUC,2),')'), col='#009999')
abline(a=0, b=1, col='#666666')

rm(roc_ROCR, AUC)

Lift Curve


Lift decreased from a starting point of almost 20 to just 5. The genes most affected seem to have been the ones with the highest scores

lift_ROCR = performance(pred_ROCR, measure='lift', x.measure='rpp')
plot(lift_ROCR, main='Lift curve', col='#86b300')

rm(lift_ROCR, pred_ROCR)

Analyse Model

Looks very similar to before, the means of each group are a bit closer together

plot_data = test_set %>% dplyr::select(corrected_score, SFARI)

ggplotly(plot_data %>% ggplot(aes(corrected_score, fill=SFARI, color=SFARI)) + geom_density(alpha=0.3) + xlab('Score') +
         geom_vline(xintercept = mean(plot_data$corrected_score[plot_data$SFARI]), color = '#00C0C2', linetype = 'dashed') +
         geom_vline(xintercept = mean(plot_data$corrected_score[!plot_data$SFARI]), color = '#FF7371', linetype = 'dashed') +
         theme_minimal() + ggtitle('Model score distribution by SFARI Label'))

The positive relation between SFARI scores and Model scores is still there but is not as strong as before

plot_data = test_set %>% mutate(ID=rownames(test_set)) %>% dplyr::select(ID, corrected_score) %>%
            left_join(original_dataset %>% mutate(ID=rownames(original_dataset)), by='ID') %>%
            dplyr::select(ID, corrected_score, gene.score) %>% apply_labels(gene.score='SFARI Gene score')

cro(plot_data$gene.score)
 #Total 
 SFARI Gene score 
   1  5
   2  12
   3  38
   4  86
   5  32
   6  4
   None  12257
   #Total cases  12434
ggplotly(plot_data %>% ggplot(aes(gene.score, corrected_score, fill=gene.score)) + geom_boxplot() + 
              scale_fill_manual(values=SFARI_colour_hue(r=c(1:6,8,7))) + 
              ggtitle('Distribution of the Model scores by SFARI score') +
              xlab('SFARI score') + ylab('Model score') + theme_minimal())

Print genes with highest corrected scores in test set

test_set %>% dplyr::select(corrected_score, SFARI) %>% mutate(ID = rownames(test_set)) %>% 
             arrange(desc(corrected_score)) %>% top_n(50, wt=corrected_score) %>%
             left_join(original_dataset %>% mutate(ID=rownames(original_dataset)), by='ID')  %>% 
             left_join(gene_names, by = c('ID'='ensembl_gene_id')) %>%
             dplyr::rename('GeneSymbol' = external_gene_id, 'Probability' = corrected_score,
                           'ModuleDiagnosis_corr' = MTcor, 'GeneSignificance' = GS) %>%
             mutate(ModuleDiagnosis_corr = round(ModuleDiagnosis_corr,4), Probability = round(Probability,4), 
                    GeneSignificance = round(GeneSignificance,4)) %>%
             dplyr::select(GeneSymbol, GeneSignificance, ModuleDiagnosis_corr, Module, Probability, gene.score) %>%
             kable(caption = 'Genes with highest model probabilities from the test set')
Genes with highest model probabilities from the test set
GeneSymbol GeneSignificance ModuleDiagnosis_corr Module Probability gene.score
SAP130 -0.2482 -0.6031 #00BA38 0.8324 None
FMNL1 -0.2223 -0.6031 #00BA38 0.8252 None
RPRD2 -0.0922 -0.6031 #00BA38 0.8192 None
HUNK -0.3273 -0.6750 #D376FF 0.8190 None
MIDN -0.0520 -0.6031 #00BA38 0.8182 None
POM121C -0.2791 -0.6031 #00BA38 0.8164 None
AMPD3 -0.2810 -0.9514 #00C0AF 0.8132 None
DROSHA -0.4111 -0.6031 #00BA38 0.8112 None
PCDHB13 -0.0396 -0.4946 #B79F00 0.8104 None
SCAF4 0.0185 -0.6031 #00BA38 0.8095 None
BCL9 0.1299 -0.6031 #00BA38 0.8060 None
FOXJ2 0.2507 0.1127 #FF62BC 0.7993 None
RNF111 -0.2410 0.0586 #FE6E8A 0.7982 None
RFX7 0.1372 0.0586 #FE6E8A 0.7963 None
CACNG3 -0.4689 -0.6031 #00BA38 0.7954 None
CHAMP1 -0.4093 -0.6031 #00BA38 0.7953 None
ITGAX -0.0073 -0.0094 #00A7FF 0.7950 None
FOXP4 -0.2102 -0.6031 #00BA38 0.7931 None
CACNG2 -0.6778 -0.6031 #00BA38 0.7925 None
UBLCP1 -0.4921 -0.9514 #00C0AF 0.7865 None
ZNF385A -0.3231 -0.6031 #00BA38 0.7860 None
PLAGL2 0.3941 0.1127 #FF62BC 0.7856 None
SMG7 -0.2560 -0.6031 #00BA38 0.7817 None
TCF7L2 0.1338 -0.6031 #00BA38 0.7811 3
ATF7 0.1072 -0.6031 #00BA38 0.7807 None
R3HDM2 -0.4078 -0.6031 #00BA38 0.7781 None
NOS1 -0.4703 -0.6031 #00BA38 0.7772 None
GATAD2B -0.4221 -0.6031 #00BA38 0.7772 None
FAM193A 0.0355 0.0586 #FE6E8A 0.7763 None
TICAM1 -0.3119 -0.6031 #00BA38 0.7747 None
PATL1 -0.1506 0.1127 #FF62BC 0.7744 None
ATN1 -0.2052 -0.6031 #00BA38 0.7744 None
SP2 -0.0882 -0.6031 #00BA38 0.7738 None
TBC1D8 0.0969 0.7287 #39B600 0.7728 None
SLC7A8 -0.5805 -0.9514 #00C0AF 0.7721 None
GRIK5 -0.3145 -0.6031 #00BA38 0.7705 3
UBAP2L -0.3318 -0.6031 #00BA38 0.7700 None
SERPINB8 0.2042 0.1127 #FF62BC 0.7697 None
PACSIN3 -0.0825 -0.9514 #00C0AF 0.7689 None
TRIM67 -0.0556 0.1127 #FF62BC 0.7687 None
EP400 -0.1671 -0.6031 #00BA38 0.7687 3
PCGF2 -0.3530 -0.6031 #00BA38 0.7680 None
SBK1 -0.3553 -0.6031 #00BA38 0.7677 None
TRAPPC1 -0.4606 -0.6031 #00BA38 0.7670 None
CCNK 0.2505 0.7916 #00C097 0.7665 None
SLC32A1 -0.5797 -0.6031 #00BA38 0.7657 None
CCRN4L -0.0285 -0.0094 #00A7FF 0.7654 None
FAM222B -0.3060 -0.6031 #00BA38 0.7621 None
AQP1 -0.0924 -0.9514 #00C0AF 0.7619 None
JAZF1 -0.3136 -0.9514 #00C0AF 0.7608 None



Negative samples distribution


  • The genes with the highest scores were affected the most as a group

  • More genes got their score increased than decreased but on average, the ones that got it decreased had a bigger change

negative_set = test_set %>% filter(!SFARI)

negative_set %>% mutate(diff = abs(prob-corrected_score)) %>% 
             ggplot(aes(prob, corrected_score, color = diff)) + geom_point(alpha=0.2) + scale_color_viridis() + 
             geom_abline(slope=1, intercept=0, color='gray', linetype='dashed') + 
             geom_smooth(color='#666666', alpha=0.5, se=TRUE, size=0.5) + coord_fixed() +
             xlab('Original probability') + ylab('Corrected probability') + theme_minimal() + theme(legend.position = 'none')

negative_set_table = negative_set %>% mutate(corrected_pred = corrected_score>0.5) %>%
                     apply_labels(corrected_score = 'Corrected Probability', 
                                  corrected_pred = 'Corrected Class Prediction',
                                  pred = 'Original Class Prediction')

cro(negative_set_table$pred, list(negative_set_table$corrected_pred, total()))
 Corrected Class Prediction     #Total 
 FALSE   TRUE   
 Original Class Prediction 
   FALSE  7478 553   8031
   TRUE  498 3728   4226
   #Total cases  7976 4281   12257
cat(paste0('\n', round(100*mean(negative_set_table$corrected_pred == negative_set_table$pred)),
           '% of the genes maintained their original predicted class'))
## 
## 91% of the genes maintained their original predicted class
rm(negative_set_table)

Probability and Gene Significance


The relation is not as strong as before in the highest scores

*The transparent verison of the trend line is the original trend line

negative_set %>% ggplot(aes(corrected_score, GS, color=MTcor)) + geom_point() + geom_smooth(method='gam', color='#666666') +
                 geom_line(stat='smooth', method='gam', color='#666666', alpha=0.5, size=1.2, aes(x=prob)) +
                 geom_hline(yintercept=mean(negative_set$GS), color='gray', linetype='dashed') +
                 scale_color_gradientn(colours=c('#F8766D','white','#00BFC4')) + xlab('Corrected Score') +
                 ggtitle('Relation between the Model\'s Corrected Score and Gene Significance') + theme_minimal()

Summarised version of score vs mean expression, plotting by module instead of by gene

The difference in the trend lines between this plot and the one above is that the one above takes all the points into consideration while this considers each module as an observation by itself, so the top one is strongly affected by big modules and the bottom one treats all modules the same

The transparent version of each point and trend lines are the original values and trends before the bias correction

  • The average score for the modules with negative correlation decreased a little and for the ones with positive correlation increased a little
plot_data = negative_set %>% group_by(MTcor) %>% summarise(mean = mean(prob), sd = sd(prob),new_mean = mean(corrected_score),
                                                           new_sd = sd(corrected_score), n = n()) %>%
            mutate(MTcor_sign = ifelse(MTcor>0, 'Positive', 'Negative')) %>% left_join(original_dataset, by='MTcor') %>%
            dplyr::select(Module, MTcor, MTcor_sign, mean, new_mean, sd, new_sd, n) %>% distinct()
colnames(plot_data)[1] = 'ID'

ggplotly(plot_data %>% ggplot(aes(MTcor, new_mean, size=n, color=MTcor_sign)) + geom_point(aes(id = ID)) + 
         geom_smooth(method='loess', color='gray', se=FALSE) + geom_smooth(method='lm', se=FALSE) + 
         geom_point(aes(y=mean), alpha=0.3) + xlab('Module-Diagnosis correlation') + ylab('Mean Corrected Score by the Model') + 
         geom_line(stat='smooth', method='loess', color='gray', se=FALSE, alpha=0.3, size=1.2, aes(y=mean)) + 
         geom_line(stat='smooth', method='lm', se=FALSE, alpha=0.3, size=1.2, aes(y=mean)) + 
         theme_minimal() + theme(legend.position='none'))


Probability and mean level of expression


Check if correcting by gene also corrected by module: Yes, but not enough to remove the bias completely

mean_and_sd = data.frame(ID=rownames(datExpr), meanExpr=rowMeans(datExpr), sdExpr=apply(datExpr,1,sd))

plot_data = negative_set %>% mutate(ID=rownames(test_set)[!test_set$SFARI]) %>% left_join(mean_and_sd, by='ID') %>% 
            left_join(original_dataset %>% mutate(ID=rownames(original_dataset)) %>% 
                      dplyr::select(ID, Module), by='ID')

plot_data2 = plot_data %>% group_by(Module) %>% summarise(meanExpr = mean(meanExpr), meanProb = mean(prob), 
                                                          new_meanProb = mean(corrected_score), n=n())

ggplotly(plot_data2 %>% ggplot(aes(meanExpr, new_meanProb, size=n)) + 
         geom_point(color=plot_data2$Module) + geom_point(color=plot_data2$Module, alpha=0.3, aes(y=meanProb)) + 
         geom_smooth(method='loess', se=TRUE, color='gray', alpha=0.1, size=0.7) + 
         geom_line(stat='smooth', method='loess', se=TRUE, color='gray', alpha=0.4, size=1.2, aes(y=meanProb)) +
         theme_minimal() + theme(legend.position='none') + xlab('Mean Expression') + ylab('Corrected Probability') +
         ggtitle('Mean expression vs corrected Model score by Module'))
rm(plot_data2, mean_and_sd)


Probability and SD of level of expression


The relation between SD and score became bigger than before

plot_data %>% ggplot(aes(sdExpr, corrected_score)) + geom_point(alpha=0.1, color='#0099cc') + 
              geom_smooth(method='lm', color='#999999', se=FALSE, alpha=1) + xlab('SD') + ylab('Corrected Probability') +
              geom_line(stat='smooth', method='lm', color='#999999', se=FALSE, alpha=0.4, size=1.5, aes(y=prob)) + 
              theme_minimal() + ggtitle('SD vs model probability by gene') + scale_x_sqrt()

This fitted curve looks like the opposite of the trend found between mean/sd and model scores

plot_data %>% ggplot(aes(meanExpr, sdExpr)) + geom_point(alpha=0.1, color='#0099cc') + 
              geom_smooth(method='loess', color='gray', alpha=0.3) + 
              geom_smooth(method='lm', color='#999999', se=FALSE, alpha=1) + 
              scale_x_log10() + scale_y_log10() +  xlab('Mean Expression') + ylab('SD of Expression') +
              theme_minimal() + ggtitle('Mean expression vs SD by gene')


Probability and lfc


The relation seems to have gotten a bit stronger for the over-expressed genes and a bit weaker for the under-expressed genes

plot_data = negative_set %>% mutate(ID=rownames(test_set)[!test_set$SFARI]) %>% 
            left_join(DE_info %>% mutate(ID=rownames(DE_info)), by='ID')

plot_data %>% ggplot(aes(log2FoldChange, corrected_score)) + geom_point(alpha=0.1, color='#0099cc') + 
              geom_smooth(method='loess', color='gray', alpha=0.1) + xlab('LFC') + ylab('Corrected Probability') +
              geom_line(stat='smooth', method='loess', color='gray', alpha=0.4, size=1.5, aes(y=prob)) +
              theme_minimal() + ggtitle('LFC vs model probability by gene')

plot_data %>% mutate(DE = padj<0.05) %>% ggplot(aes(log2FoldChange, corrected_score, color=DE)) + geom_point(alpha=0.1) + 
             geom_smooth(method='loess', alpha=0.1) + xlab('LFC') + ylab('Corrected Probability') + 
              geom_line(stat='smooth', method='loess', alpha=0.4, size=1.5, aes(y=prob, color = DE)) +
              theme_minimal() + ggtitle('LFC vs model probability by gene')


Probability and Module-Diagnosis correlation


Not much change

module_score = negative_set %>% mutate(ID=rownames(test_set)[!test_set$SFARI]) %>%
               left_join(original_dataset %>% mutate(ID = rownames(original_dataset)), by='ID') %>%
               dplyr::select(ID, prob, corrected_score, Module, MTcor.x) %>% rename(MTcor = MTcor.x) %>% 
               left_join(data.frame(MTcor=unique(dataset$MTcor)) %>% arrange(by=MTcor) %>% 
                         mutate(order=1:length(unique(dataset$MTcor))), by='MTcor')

ggplotly(module_score %>% ggplot(aes(MTcor, corrected_score)) + geom_point(color=module_score$Module, aes(id=ID, alpha=corrected_score^4)) +
         geom_hline(yintercept=mean(module_score$corrected_score), color='gray', linetype='dotted') + 
         geom_line(stat='smooth', method = 'loess', color='gray', alpha=0.5, size=1.5, aes(x=MTcor, y=prob)) +
         geom_smooth(color='gray', method = 'loess', se = FALSE, alpha=0.3) + theme_minimal() + 
         xlab('Module-Diagnosis correlation') + ylab('Corrected Score'))



Conclusion


This bias correction seems to be working but we may be losing a bit of biological signal on the way, mainly for under-expressed genes


Saving results

write.csv(dataset, file='./../Data/BC_post_proc_model.csv', row.names = TRUE)




Session info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] expss_0.10.2       ROCR_1.0-7         gplots_3.0.3       Rtsne_0.15        
##  [5] mgcv_1.8-31        nlme_3.1-147       biomaRt_2.40.5     RColorBrewer_1.1-2
##  [9] gridExtra_2.3      viridis_0.5.1      viridisLite_0.3.0  plotly_4.9.2      
## [13] knitr_1.28         forcats_0.5.0      stringr_1.4.0      dplyr_0.8.5       
## [17] purrr_0.3.3        readr_1.3.1        tidyr_1.0.2        tibble_3.0.0      
## [21] ggplot2_3.3.0      tidyverse_1.3.0   
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1                backports_1.1.5            
##   [3] Hmisc_4.4-0                 plyr_1.8.6                 
##   [5] lazyeval_0.2.2              splines_3.6.3              
##   [7] crosstalk_1.1.0.1           BiocParallel_1.18.1        
##   [9] GenomeInfoDb_1.20.0         digest_0.6.25              
##  [11] foreach_1.5.0               htmltools_0.4.0            
##  [13] gdata_2.18.0                fansi_0.4.1                
##  [15] magrittr_1.5                checkmate_2.0.0            
##  [17] memoise_1.1.0               cluster_2.1.0              
##  [19] annotate_1.62.0             recipes_0.1.10             
##  [21] modelr_0.1.6                gower_0.2.1                
##  [23] matrixStats_0.56.0          prettyunits_1.1.1          
##  [25] jpeg_0.1-8.1                colorspace_1.4-1           
##  [27] blob_1.2.1                  rvest_0.3.5                
##  [29] haven_2.2.0                 xfun_0.12                  
##  [31] crayon_1.3.4                RCurl_1.98-1.1             
##  [33] jsonlite_1.6.1              genefilter_1.66.0          
##  [35] survival_3.1-11             iterators_1.0.12           
##  [37] glue_1.3.2                  gtable_0.3.0               
##  [39] ipred_0.9-9                 zlibbioc_1.30.0            
##  [41] XVector_0.24.0              DelayedArray_0.10.0        
##  [43] shape_1.4.4                 BiocGenerics_0.30.0        
##  [45] scales_1.1.0                DBI_1.1.0                  
##  [47] Rcpp_1.0.4                  xtable_1.8-4               
##  [49] progress_1.2.2              htmlTable_1.13.3           
##  [51] foreign_0.8-75              bit_1.1-15.2               
##  [53] Formula_1.2-3               stats4_3.6.3               
##  [55] lava_1.6.7                  prodlim_2019.11.13         
##  [57] glmnet_3.0-2                htmlwidgets_1.5.1          
##  [59] httr_1.4.1                  acepack_1.4.1              
##  [61] ellipsis_0.3.0              farver_2.0.3               
##  [63] pkgconfig_2.0.3             XML_3.99-0.3               
##  [65] nnet_7.3-13                 dbplyr_1.4.2               
##  [67] locfit_1.5-9.4              caret_6.0-86               
##  [69] labeling_0.3                tidyselect_1.0.0           
##  [71] rlang_0.4.5                 reshape2_1.4.3             
##  [73] AnnotationDbi_1.46.1        munsell_0.5.0              
##  [75] cellranger_1.1.0            tools_3.6.3                
##  [77] cli_2.0.2                   generics_0.0.2             
##  [79] RSQLite_2.2.0               broom_0.5.5                
##  [81] evaluate_0.14               yaml_2.2.1                 
##  [83] ModelMetrics_1.2.2.2        bit64_0.9-7                
##  [85] fs_1.4.0                    caTools_1.18.0             
##  [87] xml2_1.2.5                  compiler_3.6.3             
##  [89] rstudioapi_0.11             curl_4.3                   
##  [91] png_0.1-7                   reprex_0.3.0               
##  [93] geneplotter_1.62.0          stringi_1.4.6              
##  [95] highr_0.8                   lattice_0.20-40            
##  [97] Matrix_1.2-18               vctrs_0.2.4                
##  [99] pillar_1.4.3                lifecycle_0.2.0            
## [101] data.table_1.12.8           bitops_1.0-6               
## [103] GenomicRanges_1.36.1        R6_2.4.1                   
## [105] latticeExtra_0.6-29         KernSmooth_2.23-16         
## [107] IRanges_2.18.3              codetools_0.2-16           
## [109] MASS_7.3-51.5               gtools_3.8.2               
## [111] assertthat_0.2.1            SummarizedExperiment_1.14.1
## [113] DESeq2_1.24.0               withr_2.1.2                
## [115] S4Vectors_0.22.1            GenomeInfoDbData_1.2.1     
## [117] parallel_3.6.3              hms_0.5.3                  
## [119] grid_3.6.3                  rpart_4.1-15               
## [121] timeDate_3043.102           class_7.3-16               
## [123] rmarkdown_2.1               pROC_1.16.2                
## [125] Biobase_2.44.0              lubridate_1.7.4            
## [127] base64enc_0.1-3